#path = 'https://raw.githubusercontent.com/umangkejriwal1122/Machine-Learning/master/Data%20Sets/covid_19_clean_complete.csv'
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
df = pd.read_csv('C:/Users/VAIO/Downloads/COVID-19 AI ML/covid_19_clean_complete.csv')
df.head()
df.drop(['Province/State'],axis=1,inplace=True)
df.head()
df.info()
df['Date'] = pd.to_datetime(df['Date'])
df.info()
df.rename(columns={"Country/Region":"Country"},inplace=True)
df.head()
df['Active'] = df['Confirmed'] - df['Recovered'] - df['Deaths']
df.tail()
#### Latest Data
latest_date = df[df['Date']==df['Date'].max()]
latest_date.head()
world = latest_date.groupby('Country')['Confirmed','Deaths','Recovered','Active'].sum()
world = world.reset_index()
world.head()
#### Plot on World Map (Active Cases)
figure = px.choropleth(world,locations='Country',locationmode='country names',
color='Active',range_color=[1,10000]
,color_continuous_scale='ylorrd',title='World Map Plot')
figure.show()
#### Plot WorldWide Confirmed Cases Over Data
World_Total_Confirmed = df.groupby('Date')['Confirmed'].sum().reset_index()
World_Total_Confirmed.tail()
plt.figure(figsize=(20,8))
plt.xticks(rotation=90,fontsize=8)
sns.pointplot(World_Total_Confirmed['Date'].dt.date,World_Total_Confirmed['Confirmed']
,color='red')
plt.show()
plt.figure(figsize=(12,8))
plt.xticks(rotation=90,fontsize=7)
plt.plot(World_Total_Confirmed['Date'].dt.date,World_Total_Confirmed['Confirmed']
,color='red')
plt.scatter(World_Total_Confirmed['Date'].dt.date,World_Total_Confirmed['Confirmed']
,color='blue')
plt.show()
##### Top 20 Countries having most number of Active Cases
Active_Top_Countries = latest_date.groupby('Country')['Active'].sum().sort_values(ascending=False)
Active_Top_Countries = Active_Top_Countries.reset_index()
Active_Top_Countries.head()
Active_Top_20_Countries = Active_Top_Countries.head(20)
plt.figure(figsize=(8,8))
sns.barplot(Active_Top_20_Countries['Active'],Active_Top_20_Countries['Country'], label = "Active Cases")
plt.xlabel("Active", fontdict = {'fontsize' : 15})
plt.ylabel("Country", fontdict = {'fontsize' : 15})
plt.title("Top 20 countries with Active COVID-19 cases", fontdict = {'fontsize' : 15})
plt.legend()
plt.show()
##### Top 20 Countries having most number of Recovered Cases
Recovered_Top_Countries = latest_date.groupby('Country')['Recovered'].sum().sort_values(ascending=False)
Recovered_Top_Countries= Recovered_Top_Countries.reset_index()
Recovered_Top_20_Countries = Recovered_Top_Countries.head(20)
plt.figure(figsize=(8,8))
sns.barplot(Recovered_Top_20_Countries['Country'],Recovered_Top_20_Countries['Recovered'], label = "Recovered")
plt.tick_params(axis="x",labelrotation=90, labelsize = 12)
plt.xlabel("Recovered", fontdict = {'fontsize' : 15})
plt.ylabel("Country", fontdict = {'fontsize' : 15})
plt.title("Top 20 countries with Recovered COVID-19 cases", fontdict = {'fontsize' : 15})
plt.legend()
plt.show()
plt.show()
##### Top 20 Countries having most number of Confirmed Cases
Confirmed_Top_Countries = latest_date.groupby('Country')['Confirmed'].sum().sort_values(ascending=False)
Confirmed_Top_Countries= Confirmed_Top_Countries.reset_index()
Confirmed_Top_20_Countries = Confirmed_Top_Countries.head(20)
plt.figure(figsize=(8,8))
sns.barplot(Confirmed_Top_20_Countries['Country'],Confirmed_Top_20_Countries['Confirmed'], label = "Confirmed")
plt.tick_params(axis="x",labelrotation=90, labelsize = 12)
plt.xlabel("Confirmed", fontdict = {'fontsize' : 15})
plt.ylabel("Country", fontdict = {'fontsize' : 15})
plt.title("Top 20 countries with Confirmed COVID-19 cases", fontdict = {'fontsize' : 15})
plt.legend()
plt.show()
plt.figure(figsize=(8,8))
sns.barplot(Confirmed_Top_20_Countries['Confirmed'],Confirmed_Top_20_Countries['Country'],color='red', label = "Confirmed")
sns.barplot(Recovered_Top_20_Countries['Recovered'],Recovered_Top_20_Countries['Country'],color='green', label = "Recovered")
plt.xlabel("Scale", fontdict = {'fontsize' : 15})
plt.ylabel("Country", fontdict = {'fontsize' : 15})
plt.title("Top 20 countries with Confirmed and Recovered COVID-19 cases", fontdict = {'fontsize' : 15})
plt.legend()
plt.show()
#### Make some dataframes for some countries
India = df[df['Country']=='India']
India = India.groupby('Date')['Recovered','Deaths','Active','Confirmed'].sum().reset_index()
India.tail()
plt.figure(figsize=(5,5))
plt.plot(India['Date'],India['Confirmed'],color='blue', label = "Confirmed")
plt.plot(India['Date'],India['Active'],color='orange', label = "Active")
plt.plot(India['Date'],India['Deaths'],color='red', label = "Deaths")
plt.plot(India['Date'],India['Recovered'],color='green', label = "Recovered")
plt.tick_params(axis="x",labelrotation=90, labelsize = 12)
plt.xlabel("Scale", fontdict = {'fontsize' : 15})
plt.ylabel("Country", fontdict = {'fontsize' : 15})
plt.title("India Covid-19 analysis", fontdict = {'fontsize' : 15})
plt.legend()
plt.show()
### Italy , US, China & India
Italy = df[df['Country']=='Italy']
Italy = Italy.groupby('Date')['Recovered','Deaths','Active','Confirmed'].sum().reset_index()
US = df[df['Country']=='US']
US = US.groupby('Date')['Recovered','Deaths','Active','Confirmed'].sum().reset_index()
China = df[df['Country']=='China']
China = China.groupby('Date')['Recovered','Deaths','Active','Confirmed'].sum().reset_index()
plt.figure(figsize=(12,8))
plt.plot(China.index,China['Recovered'],color='Red',label='China')
plt.plot(India.index,India['Recovered'],color='Green',label='India')
plt.plot(US.index,US['Recovered'],color='Black',label='US')
plt.plot(Italy.index,Italy['Recovered'],color='Blue',label='Italy')
plt.scatter(China.index,China['Recovered'],color='Red')
plt.scatter(India.index,India['Recovered'],color='Green')
plt.scatter(US.index,US['Recovered'],color='Black')
plt.scatter(Italy.index,Italy['Recovered'],color='Blue')
plt.legend(loc=2)
plt.show()
#### Library - Fbprophet
#### Created by Facebook Company for time series analysis
#### The two column should be named as ds(date) and y(data)
#### Where to Apply time series -
#### 1. When the data is not constant
#### 2. When the data is not following any function
from fbprophet import Prophet
df.head()
confirmed = df.groupby('Date')['Confirmed'].sum().reset_index()
confirmed.head()
confirmed.columns = ['ds','y']
#### Build The Model
model = Prophet(interval_width=0.95)
### Train the model
model.fit(confirmed)
future_dates = model.make_future_dataframe(periods=7)
future_dates.tail(10)
forecast = model.predict(future_dates)
forecast[['ds','yhat','yhat_lower','yhat_upper']].tail(10)
confirmed.tail()
confirmed_plot = model.plot(forecast)
##### Top 20 Countries having most number of Active Cases
# top_data = latest_date.groupby('Country')['Confirmed','Recovered'].sum()
# top_data = top_data.sort_values('Confirmed',ascending=False)
# top_data = top_data.reset_index()
# top_data.head()
# top_20 = top_data.head(20)
# plt.figure(figsize=(8,8))
# sns.barplot(top_20['Confirmed'],top_20['Country'],color='red')
# sns.barplot(top_20['Recovered'],top_20['Country'],color='green')
# plt.show()
India_Prediction = df[df['Country']=='India']
India_Prediction.tail()
grouped_India_Prediction = India_Prediction.groupby('Date')['Confirmed'].sum().reset_index()
grouped_India_Prediction.rename(columns={'Date':'ds','Confirmed':'y'},inplace=True)
grouped_India_Prediction.tail()
#### Build The Model
india_model = Prophet(interval_width=0.95)
### Train the model
india_model.fit(grouped_India_Prediction)
future_prediction_dates = model.make_future_dataframe(periods=7)
future_prediction_dates.tail(7)
prediction = india_model.predict(future_prediction_dates)
prediction.head(3)
prediction_result = prediction[['ds','yhat','yhat_lower','yhat_upper']]
prediction_result.rename(columns={'yhat':'predicted','yhat_lower':'lower_limit','yhat_upper':'upper_limit'}, inplace=True)
prediction_result.tail(8)
Trend chart for month April till mentioned forcast day
prediction_month = prediction_result.iloc[-40:,:]
#import datetime
plt.figure(figsize=(15,5))
plt.plot(prediction_month['ds'],prediction_month['predicted'],color='blue', label = "Predicted")
plt.tick_params(axis="x",labelrotation=90, labelsize = 12)
#plt.xlim([datetime.date(2020,4,1), datetime.date(2020, 4, 15)])
plt.xlabel("Date", fontdict = {'fontsize' : 15})
plt.ylabel("Count", fontdict = {'fontsize' : 15})
plt.title("India Covid-19 predictions for April Month", fontdict = {'fontsize' : 15})
plt.legend()
plt.show()
prediction_dates = prediction_result.iloc[-7:,:]
plt.figure(figsize=(10,5))
plt.plot(prediction_dates['ds'],prediction_dates['predicted'],color='red', label = "Predicted")
plt.tick_params(axis="x",labelrotation=90, labelsize = 12)
plt.xlabel("Date", fontdict = {'fontsize' : 15})
plt.ylabel("Count", fontdict = {'fontsize' : 15})
plt.title("India Covid-19 future 7 days predictions", fontdict = {'fontsize' : 15})
plt.legend()
plt.show()